package io.chenglicun.common.utils;


import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class SpiderUtils {


    public static String fetchRichTextFromWeChatArticle(String url) throws Exception {
        Document document = Jsoup.connect(url).get();
        Element contentElement = document.select("div[class^=rich_media_content]").first();
        if (contentElement == null) {
            return null;
        }
        Elements imgs = contentElement.select("img");
        for (Element img : imgs) {
            img.attr("src", img.attr("data-src"));
            img.removeAttr("data-src");
        }

        return contentElement.html().replaceAll("&quot;", "'");
    }




}
